# vjd 8/12/14 updated for plos submission
# vjd 2/23/11
# obtains the results from the bivariate logit model
# this bit of code is to be run after "calc_dist.r" is run
# NOTE: "calc_dist.r" has been updated to not call the write.dta function
# NOTE: "calc_dist.r" has been updated to include 4 classes of DV

library(foreign)
library(VGAM)
library(ggplot2)
library(extrafont)

rm(list=ls())

setwd("/Users/vjdorazio/Desktop/Sequence_Analysis_paper/plos_version/data")
#mydata <- read.dta("monthly_seq_analysis.dta")
mydata <- read.dta("seq_analysis_111914.dta")


y1 <- vector(mode="numeric",length=nrow(mydata))
y2 <-vector(mode="numeric",length=nrow(mydata))
mydata <- cbind(mydata,y1,y2)
mydata$y1[mydata$Dyad_Type==3] <- 1 #y1 conflict
mydata$y2[mydata$Dyad_Type==3] <- 1 #y2 conflict
mydata$y1[mydata$Dyad_Type==2] <- 1 #y1 conflict
mydata$y2[mydata$Dyad_Type==1] <- 1 #y2 conflict

mydata$Dyad_Type[mydata$y1==0 & mydata$y2==0] <- 0
mydata$Dyad_Type[(mydata$y1==1 & mydata$y2==0)|mydata$y1==0 & mydata$y2==1] <- 1
mydata$Dyad_Type[mydata$y1==1 & mydata$y2==1] <- 2

mydata$Euc_A_Verb_Coop <- as.numeric(mydata$Euc_A_Verb_Coop) 
mydata$Euc_A_Mater_Coop <- as.numeric(mydata$Euc_A_Mater_Coop)
mydata$Euc_A_Verb_Confl <- as.numeric(mydata$Euc_A_Verb_Confl)
mydata$Euc_A_Mater_Confl <- as.numeric(mydata$Euc_A_Mater_Confl)
mydata$Euc_B_Verb_Coop <- as.numeric(mydata$Euc_B_Verb_Coop)
mydata$Euc_B_Mater_Coop <- as.numeric(mydata$Euc_B_Mater_Coop)
mydata$Euc_B_Verb_Confl <- as.numeric(mydata$Euc_B_Verb_Confl)
mydata$Euc_B_Mater_Confl <- as.numeric(mydata$Euc_B_Mater_Confl)

mydata$Lev_A_Verb_Coop <- as.numeric(mydata$Lev_A_Verb_Coop)
mydata$Lev_A_Mater_Coop <- as.numeric(mydata$Lev_A_Mater_Coop)
mydata$Lev_A_Verb_Confl <- as.numeric(mydata$Lev_A_Verb_Confl)
mydata$Lev_A_Mater_Confl <- as.numeric(mydata$Lev_A_Mater_Confl)
mydata$Lev_B_Verb_Coop <- as.numeric(mydata$Lev_B_Verb_Coop)
mydata$Lev_B_Mater_Coop <- as.numeric(mydata$Lev_B_Mater_Coop)
mydata$Lev_B_Verb_Confl <- as.numeric(mydata$Lev_B_Verb_Confl)
mydata$Lev_B_Mater_Confl <- as.numeric(mydata$Lev_B_Mater_Confl)

mydata$mi_A_Verb_Coop <- as.numeric(mydata$mi_A_Verb_Coop)
mydata$mi_A_Mater_Coop <- as.numeric(mydata$mi_A_Mater_Coop)
mydata$mi_A_Verb_Confl <- as.numeric(mydata$mi_A_Verb_Confl)
mydata$mi_A_Mater_Confl <- as.numeric(mydata$mi_A_Mater_Confl)
mydata$mi_B_Verb_Coop <- as.numeric(mydata$mi_B_Verb_Coop)
mydata$mi_B_Mater_Coop <- as.numeric(mydata$mi_B_Mater_Coop)
mydata$mi_B_Verb_Confl <- as.numeric(mydata$mi_B_Verb_Confl)
mydata$mi_B_Mater_Confl <- as.numeric(mydata$mi_B_Mater_Confl)

mydata$Other_Crises <- as.numeric(mydata$Other_Crises)

mydata <- cbind(mydata,1)

mydata$Dyad_Type <- as.factor(mydata$Dyad_Type)
data.0 <- mydata[which(mydata$Dyad_Type==0),]
data.1 <- mydata[which(mydata$Dyad_Type==1),]
data.2 <- mydata[which(mydata$Dyad_Type==2),]


##############################
## distance histograms
#############################


main.0 <- "Type 0 Pairs"
main.1 <- "Type 1 Pairs"
main.2 <- "Type 2 Pairs"

my.xlim <- c(0,70)
my.ylim <- c(0,1)
par(mfrow=c(1,3))

hist(data.0$Euc_A_Verb_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.0, xlab="")
hist(data.1$Euc_A_Verb_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.1, xlab="")
hist(data.2$Euc_A_Verb_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.2, xlab="")


my.xlim <- c(0,30)
my.ylim <- c(0,.8)
par(mfrow=c(1,3))

hist(data.0$Euc_A_Verb_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.0, xlab="")
hist(data.1$Euc_A_Verb_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.1, xlab="")
hist(data.2$Euc_A_Verb_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.2, xlab="")


my.xlim <- c(0,30)
my.ylim <- c(0,1.7)
par(mfrow=c(1,3))

hist(data.0$Euc_A_Mater_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.0, xlab="")
hist(data.1$Euc_A_Mater_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.1, xlab="")
hist(data.2$Euc_A_Mater_Coop, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.2, xlab="")


my.xlim <- c(0,70)
my.ylim <- c(0,.8)
par(mfrow=c(1,3))

hist(data.0$Euc_A_Mater_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.0, xlab="")
hist(data.1$Euc_A_Mater_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.1, xlab="")
hist(data.2$Euc_A_Mater_Confl, xlim=my.xlim, ylim=my.ylim, freq=FALSE, main=main.2, xlab="")

## all distances
all.0 <- data.0$Euc_A_Mater_Confl + data.0$Euc_A_Mater_Coop + data.0$Euc_A_Verb_Confl + data.0$Euc_A_Verb_Coop +
data.0$Euc_B_Mater_Confl + data.0$Euc_B_Mater_Coop + data.0$Euc_B_Verb_Confl + data.0$Euc_B_Verb_Coop

all.1 <- data.1$Euc_A_Mater_Confl + data.1$Euc_A_Mater_Coop + data.1$Euc_A_Verb_Confl + data.1$Euc_A_Verb_Coop +
data.1$Euc_B_Mater_Confl + data.1$Euc_B_Mater_Coop + data.1$Euc_B_Verb_Confl + data.1$Euc_B_Verb_Coop

all.2 <- data.2$Euc_A_Mater_Confl + data.2$Euc_A_Mater_Coop + data.2$Euc_A_Verb_Confl + data.2$Euc_A_Verb_Coop +
data.2$Euc_B_Mater_Confl + data.2$Euc_B_Mater_Coop + data.2$Euc_B_Verb_Confl + data.2$Euc_B_Verb_Coop


plotdata <- data.frame(vals=all.0, Pairs="Type 0")
t <- data.frame(vals=all.1, Pairs="Type 1")
t1 <- data.frame(vals=all.2, Pairs="Type 2")
plotdata <- rbind(plotdata, t, t1)

p<-ggplot(plotdata, aes(vals, colour = Pairs)) + geom_density() + xlab("Euclidean Distance") + ylab("Density") + guides(colour=guide_legend(title=NULL)) + ylim(0, .15) + xlim(0,200)
p<-p + theme(text = element_text(size = rel(4.5), angle = 00)) + theme(legend.text=element_text(size=rel(4)))

#p<-ggplot(plotdata, aes(vals, fill = Pairs)) + geom_density(alpha = 0.4) + xlab("Euclidean Distance") + ylab("Density") + guides(fill=guide_legend(title=NULL)) + ylim(0, .15) + xlim(0,200)
#p<-p + theme(text = element_text(size = rel(4.5), angle = 00)) + theme(legend.text=element_text(size=rel(4)))

## plot in R, save as pdf.  open pdf in preview, save as tiff.  open tiff, save without alpha channel.  open without alpha channel, compress tiff.
p

loadfonts(device="postscript")

#setEPS()
postscript(paste(plotpath, "/all_hist_week.eps", sep=""), width=7, height=7, family="Arial", paper="special", onefile=FALSE, horizontal=FALSE)
p
dev.off()



#################################
## using VGAM for bivariate logit

# first we need a variable for training data
set.seed(4422)
train <- vector(mode="numeric",length=nrow(mydata))
for (i in 1:nrow(mydata)) {
    if(runif(1)>=0.5) {
        train[i] <- TRUE
    }
}

mydata <- cbind(mydata,train)


train.data <- mydata[which(mydata$train==TRUE),]
pred.data <- mydata[which(mydata$train==FALSE),]

## Models

# Euclidean
euc.fit = vglm(cbind(y1,y2) ~ Euc_A_Verb_Coop + Euc_A_Mater_Coop + Euc_A_Verb_Confl + Euc_A_Mater_Confl + 
    Euc_B_Verb_Coop + Euc_B_Mater_Coop + Euc_B_Verb_Confl + Euc_B_Mater_Confl + Other_Crises, 
    binom2.or(exchangeable=TRUE), data=train.data)

euc.pred.fit <- predict(euc.fit,newdata=pred.data,type="response")

# Levenstein
lev.fit = vglm(cbind(y1,y2) ~ Lev_A_Verb_Coop + Lev_A_Mater_Coop + Lev_A_Verb_Confl + Lev_A_Mater_Confl +
Lev_B_Verb_Coop + Lev_B_Mater_Coop + Lev_B_Verb_Confl + Lev_B_Mater_Confl + Other_Crises,
binom2.or(exchangeable=TRUE), data=train.data)

lev.pred.fit <- predict(lev.fit,newdata=pred.data,type="response")

#Mutual Information
mi.fit = vglm(cbind(y1,y2) ~ mi_A_Verb_Coop + mi_A_Mater_Coop + mi_A_Verb_Confl + mi_A_Mater_Confl +
mi_B_Verb_Coop + mi_B_Mater_Coop + mi_B_Verb_Confl + mi_B_Mater_Confl + Other_Crises,
binom2.or(exchangeable=TRUE), data=train.data)

mi.pred.fit <- predict(mi.fit,newdata=pred.data,type="response")

#######################
## Clearly not the cleanest way to do this...
## Our three predicted probabilities for each model
# Euclidean
Euc_pred_cat_0 <- vector(mode="integer",length=nrow(pred.data))
Euc_pred_cat_1 <- vector(mode="integer",length=nrow(pred.data))
Euc_pred_cat_2 <- vector(mode="integer",length=nrow(pred.data))

euc.0 <- euc.pred.fit[,1]
euc.1 <- euc.pred.fit[,2]+euc.pred.fit[,3]
euc.2 <- euc.pred.fit[,4]

Euc_pred_cat_0[euc.0 > euc.1 & euc.0 > euc.2] <- 1
Euc_pred_cat_1[euc.1 > euc.0 &  euc.1 > euc.2] <- 1
Euc_pred_cat_2[euc.2 > euc.0 &  euc.2 > euc.1] <- 1


# Levenstein
Lev_pred_cat_0 <- vector(mode="integer",length=nrow(pred.data))
Lev_pred_cat_1 <- vector(mode="integer",length=nrow(pred.data))
Lev_pred_cat_2 <- vector(mode="integer",length=nrow(pred.data))

lev.0 <- lev.pred.fit[,1]
lev.1 <- lev.pred.fit[,2]+lev.pred.fit[,3]
lev.2 <- lev.pred.fit[,4]

Lev_pred_cat_0[lev.0 > lev.1 &  lev.0 > lev.2] <- 1
Lev_pred_cat_1[lev.1 > lev.0 &  lev.1 > lev.2] <- 1
Lev_pred_cat_2[lev.2 > lev.0 &  lev.2 > lev.1] <- 1


# Mutual Information
mi_pred_cat_0 <- vector(mode="integer",length=nrow(pred.data))
mi_pred_cat_1 <- vector(mode="integer",length=nrow(pred.data))
mi_pred_cat_2 <- vector(mode="integer",length=nrow(pred.data))


mi.0 <- mi.pred.fit[,1]
mi.1 <- mi.pred.fit[,2]+mi.pred.fit[,3]
mi.2 <- mi.pred.fit[,4]


mi_pred_cat_0[mi.0 > mi.1 &  mi.0 > mi.2] <- 1
mi_pred_cat_1[mi.1 > mi.0 &  mi.1 > mi.2] <- 1
mi_pred_cat_2[mi.2 > mi.0 &  mi.2 > mi.1] <- 1
############################


## NOTE: mydata is redefined so that I can reuse code from another test
rm(mydata)
mydata <- pred.data
mydata <- cbind(mydata,Euc_pred_cat_0,Euc_pred_cat_1,Euc_pred_cat_2, Lev_pred_cat_0, Lev_pred_cat_1, Lev_pred_cat_2, mi_pred_cat_0, mi_pred_cat_1, mi_pred_cat_2)
attach(mydata)


#######################
## Correctly Classified

# Euclidean
Euc_pred_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_correct)
Euc_pred_correct[(Euc_pred_cat_0 == 1 & Dyad_Type == 0) | (Euc_pred_cat_1 == 1 & Dyad_Type == 1) | (Euc_pred_cat_2 == 2 & Dyad_Type == 2)] <- TRUE
Euc_correct_cat <- sum(Euc_pred_correct) / nrow(mydata)

# Levenstein
Lev_pred_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_correct)
Lev_pred_correct[(Lev_pred_cat_0 == 1 & Dyad_Type == 0) | (Lev_pred_cat_1 == 1 & Dyad_Type == 1) | (Lev_pred_cat_2 == 2 & Dyad_Type == 2)] <- TRUE
Lev_correct_cat <- sum(Lev_pred_correct) / nrow(mydata)

# Mutual Information
mi_pred_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_correct)
mi_pred_correct[(mi_pred_cat_0 == 1 & Dyad_Type == 0) | (mi_pred_cat_1 == 1 & Dyad_Type == 1) | (mi_pred_cat_2 == 2 & Dyad_Type == 2)] <- TRUE
mi_correct_cat <- sum(mi_pred_correct) / nrow(mydata)


######################################
## Sensitivity and Pos Predict Value 0

# Euclidean
Euc_pred_0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_0_correct)
Euc_pred_0_correct[Euc_pred_cat_0 == 1 & Dyad_Type == 0] <- TRUE
Euc_pos_pred_val_0 <- sum(Euc_pred_0_correct) / sum(Euc_pred_cat_0)
Euc_sensitivity_0  <- sum(Euc_pred_0_correct) / sum(Dyad_Type==0)

# Levenstein
Lev_pred_0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_0_correct)
Lev_pred_0_correct[Lev_pred_cat_0 == 1 & Dyad_Type == 0] <- TRUE
Lev_pos_pred_val_0 <- sum(Lev_pred_0_correct) / sum(Lev_pred_cat_0)
Lev_sensitivity_0  <- sum(Lev_pred_0_correct) / sum(Dyad_Type==0)

# Mutual Information
mi_pred_0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_0_correct)
mi_pred_0_correct[mi_pred_cat_0 == 1 & Dyad_Type == 0] <- TRUE
mi_pos_pred_val_0 <- sum(mi_pred_0_correct) / sum(mi_pred_cat_0)
mi_sensitivity_0  <- sum(mi_pred_0_correct) / sum(Dyad_Type==0)


######################################
## Sensitivity and Pos Predict Value 1

# Euclidean
Euc_pred_1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_1_correct)
Euc_pred_1_correct[Euc_pred_cat_1 == 1 & Dyad_Type == 1] <- TRUE
Euc_pos_pred_val_1 <- sum(Euc_pred_1_correct) / sum(Euc_pred_cat_1)
Euc_sensitivity_1  <- sum(Euc_pred_1_correct) / sum(Dyad_Type==1)

# Levenstein
Lev_pred_1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_1_correct)
Lev_pred_1_correct[Lev_pred_cat_1 == 1 & Dyad_Type == 1] <- TRUE
Lev_pos_pred_val_1 <- sum(Lev_pred_1_correct) / sum(Lev_pred_cat_1)
Lev_sensitivity_1  <- sum(Lev_pred_1_correct) / sum(Dyad_Type==1)

# Mutual Information
mi_pred_1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_1_correct)
mi_pred_1_correct[mi_pred_cat_1 == 1 & Dyad_Type == 1] <- TRUE
mi_pos_pred_val_1 <- sum(mi_pred_1_correct) / sum(mi_pred_cat_1)
mi_sensitivity_1  <- sum(mi_pred_1_correct) / sum(Dyad_Type==1)


######################################
## Sensitivity and Pos Predict Value 2

# Euclidean
Euc_pred_2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_2_correct)
Euc_pred_2_correct[Euc_pred_cat_2 == 1 & Dyad_Type == 2] <- TRUE
Euc_pos_pred_val_2 <- sum(Euc_pred_2_correct) / sum(Euc_pred_cat_2)
Euc_sensitivity_2  <- sum(Euc_pred_2_correct) / sum(Dyad_Type==2)

# Levenstein
Lev_pred_2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_2_correct)
Lev_pred_2_correct[Lev_pred_cat_2 == 1 & Dyad_Type == 2] <- TRUE
Lev_pos_pred_val_2 <- sum(Lev_pred_2_correct) / sum(Lev_pred_cat_2)
Lev_sensitivity_2  <- sum(Lev_pred_2_correct) / sum(Dyad_Type==2)

# Mutual Information
mi_pred_2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_2_correct)
mi_pred_2_correct[mi_pred_cat_2 == 1 & Dyad_Type == 2] <- TRUE
mi_pos_pred_val_2 <- sum(mi_pred_2_correct) / sum(mi_pred_cat_2)
mi_sensitivity_2  <- sum(mi_pred_2_correct) / sum(Dyad_Type==2)


#######################################
## Specificity and Neg Predict Value ~0

# Euclidean
Euc_pred_not0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not0_correct)
Euc_pred_not0_correct[Euc_pred_cat_0 != 1 & Dyad_Type != 0] <- TRUE
Euc_neg_pred_val_0 <- sum(Euc_pred_not0_correct) / (sum(Euc_pred_cat_1) + sum(Euc_pred_cat_2))
Euc_specificity_0  <- (sum(Euc_pred_1_correct) + sum(Euc_pred_2_correct)) / sum(Dyad_Type != 0)

# Levenstein
Lev_pred_not0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_not0_correct)
Lev_pred_not0_correct[Lev_pred_cat_0 != 1 & Dyad_Type != 0] <- TRUE
Lev_neg_pred_val_0 <- sum(Lev_pred_not0_correct) / (sum(Lev_pred_cat_1) + sum(Lev_pred_cat_2))
Lev_specificity_0  <- (sum(Lev_pred_1_correct) + sum(Lev_pred_2_correct)) / sum(Dyad_Type != 0)

# Mutual Information
mi_pred_not0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_not0_correct)
mi_pred_not0_correct[mi_pred_cat_0 != 1 & Dyad_Type != 0] <- TRUE
mi_neg_pred_val_0 <- sum(mi_pred_not0_correct) / (sum(mi_pred_cat_1) + sum(mi_pred_cat_2))
mi_specificity_0  <- (sum(mi_pred_1_correct) + sum(mi_pred_2_correct)) / sum(Dyad_Type != 0)


#######################################
## Specificity and Neg Predict Value ~1

# Euclidean
Euc_pred_not1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not1_correct)
Euc_pred_not1_correct[Euc_pred_cat_1 != 1 & Dyad_Type != 1] <- TRUE
Euc_neg_pred_val_1 <- sum(Euc_pred_not1_correct) / (sum(Euc_pred_cat_0) + sum(Euc_pred_cat_2))
Euc_specificity_1  <- (sum(Euc_pred_0_correct) + sum(Euc_pred_2_correct)) / sum(Dyad_Type != 1)

# Levenstein
Lev_pred_not1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_not1_correct)
Lev_pred_not1_correct[Lev_pred_cat_1 != 1 & Dyad_Type != 1] <- TRUE
Lev_neg_pred_val_1 <- sum(Lev_pred_not1_correct) / (sum(Lev_pred_cat_0) + sum(Lev_pred_cat_2))
Lev_specificity_1  <- (sum(Lev_pred_0_correct) + sum(Lev_pred_2_correct)) / sum(Dyad_Type != 1)

# Mutual Information
mi_pred_not1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_not1_correct)
mi_pred_not1_correct[mi_pred_cat_1 != 1 & Dyad_Type != 1] <- TRUE
mi_neg_pred_val_1 <- sum(mi_pred_not1_correct) / (sum(mi_pred_cat_0) + sum(mi_pred_cat_2))
mi_specificity_1  <- (sum(mi_pred_0_correct) + sum(mi_pred_2_correct)) / sum(Dyad_Type != 1)


###############################################
## Specificity Predict and Neg Predict Value ~2

# Euclidean
Euc_pred_not2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not2_correct)
Euc_pred_not2_correct[Euc_pred_cat_2 != 1 & Dyad_Type != 2] <- TRUE
Euc_neg_pred_val_2 <- sum(Euc_pred_not2_correct) / (sum(Euc_pred_cat_0) + sum(Euc_pred_cat_1))
Euc_specificity_2  <- (sum(Euc_pred_0_correct) + sum(Euc_pred_1_correct)) / sum(Dyad_Type != 2)

# Levenstein
Lev_pred_not2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Lev_pred_not2_correct)
Lev_pred_not2_correct[Lev_pred_cat_2 != 1 & Dyad_Type != 2] <- TRUE
Lev_neg_pred_val_2 <- sum(Lev_pred_not2_correct) / (sum(Lev_pred_cat_0) + sum(Lev_pred_cat_1))
Lev_specificity_2  <- (sum(Lev_pred_0_correct) + sum(Lev_pred_1_correct)) / sum(Dyad_Type != 2)

# Mutual Information
mi_pred_not2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,mi_pred_not2_correct)
mi_pred_not2_correct[mi_pred_cat_2 != 1 & Dyad_Type != 2] <- TRUE
mi_neg_pred_val_2 <- sum(mi_pred_not2_correct) / (sum(mi_pred_cat_0) + sum(mi_pred_cat_1))
mi_specificity_2  <- (sum(mi_pred_0_correct) + sum(mi_pred_1_correct)) / sum(Dyad_Type != 2)


### printing tables for latex

## Performance Measures Using Robust Metrics
out.table <- data.frame(0, 0, 0, 0)
colnames(out.table) <- c("Performance Measure", "Type 0", "Type 1", "Type 2")
out.table[1,] <- c("Sensitivity", Lev_sensitivity_0, Lev_sensitivity_1, Lev_sensitivity_2)
out.table[2,] <- c("Specificity", Lev_specificity_0, Lev_specificity_1, Lev_specificity_2)
out.table[3,] <- c("Pos. Pred. Value", Lev_pos_pred_val_0, Lev_pos_pred_val_1, Lev_pos_pred_val_2)
out.table[4,] <- c("Neg. Pred. Value", Lev_neg_pred_val_0, Lev_neg_pred_val_1, Lev_neg_pred_val_2)
out.table[5,] <- c("Sensitivity", mi_sensitivity_0, mi_sensitivity_1, mi_sensitivity_2)
out.table[6,] <- c("Specificity", mi_specificity_0, mi_specificity_1, mi_specificity_2)
out.table[7,] <- c("Pos. Pred. Value", mi_pos_pred_val_0, mi_pos_pred_val_1, mi_pos_pred_val_2)
out.table[8,] <- c("Neg. Pred. Value", mi_neg_pred_val_0, mi_neg_pred_val_1, mi_neg_pred_val_2)

out.table[,2] <- as.numeric(out.table[,2])*100
out.table[,3] <- as.numeric(out.table[,3])*100
out.table[,4] <- as.numeric(out.table[,4])*100

xtable(out.table, digits=2)


### Confusion Matrix for Levenshtein
out.table2 <- data.frame(0, 0, 0, 0, 0)
colnames(out.table2) <- c("", "Type 0", "Type 1", "Type 2", "Total")
out.table2[1,1:4] <- c("Type 0", length(which(Lev_pred_cat_0==1 & mydata$Dyad_Type==0)), length(which(Lev_pred_cat_1==1 & mydata$Dyad_Type==0)), length(which(Lev_pred_cat_2==1 & mydata$Dyad_Type==0)))
out.table2[2,1:4] <- c("Type 1", length(which(Lev_pred_cat_0==1 & mydata$Dyad_Type==1)), length(which(Lev_pred_cat_1==1 & mydata$Dyad_Type==1)), length(which(Lev_pred_cat_2==1 & mydata$Dyad_Type==1)))
out.table2[3,1:4] <- c("Type 2", length(which(Lev_pred_cat_0==1 & mydata$Dyad_Type==2)), length(which(Lev_pred_cat_1==1 & mydata$Dyad_Type==2)), length(which(Lev_pred_cat_2==1 & mydata$Dyad_Type==2)))

out.table2[,2] <- as.numeric(out.table2[,2])
out.table2[,3] <- as.numeric(out.table2[,3])
out.table2[,4] <- as.numeric(out.table2[,4])

out.table2[4,1:4] <- c("Total", sum(out.table2[1:3,2]), sum(out.table2[1:3,3]), sum(out.table2[1:3,4]))
out.table2[1:3,5] <- c(sum(as.numeric(out.table2[1,2:4])), sum(as.numeric(out.table2[2,2:4])), sum(as.numeric(out.table2[3,2:4])))
out.table2[4,5] <- sum(as.numeric(out.table2[4,2:4]))

xtable(out.table2)

### Confusion Matrix for Mutual Information

out.table3 <- data.frame(0, 0, 0, 0, 0)
colnames(out.table3) <- c("", "Type 0", "Type 1", "Type 2", "Total")
out.table3[1,1:4] <- c("Type 0", length(which(mi_pred_cat_0==1 & mydata$Dyad_Type==0)), length(which(mi_pred_cat_1==1 & mydata$Dyad_Type==0)), length(which(mi_pred_cat_2==1 & mydata$Dyad_Type==0)))
out.table3[2,1:4] <- c("Type 1", length(which(mi_pred_cat_0==1 & mydata$Dyad_Type==1)), length(which(mi_pred_cat_1==1 & mydata$Dyad_Type==1)), length(which(mi_pred_cat_2==1 & mydata$Dyad_Type==1)))
out.table3[3,1:4] <- c("Type 2", length(which(mi_pred_cat_0==1 & mydata$Dyad_Type==2)), length(which(mi_pred_cat_1==1 & mydata$Dyad_Type==2)), length(which(mi_pred_cat_2==1 & mydata$Dyad_Type==2)))

out.table3[,2] <- as.numeric(out.table3[,2])
out.table3[,3] <- as.numeric(out.table3[,3])
out.table3[,4] <- as.numeric(out.table3[,4])

out.table3[4,1:4] <- c("Total", sum(out.table3[1:3,2]), sum(out.table3[1:3,3]), sum(out.table3[1:3,4]))
out.table3[1:3,5] <- c(sum(as.numeric(out.table3[1,2:4])), sum(as.numeric(out.table3[2,2:4])), sum(as.numeric(out.table3[3,2:4])))
out.table3[4,5] <- sum(as.numeric(out.table3[4,2:4]))

xtable(out.table3)



### Euclidean Performance Measures

out.table4 <- data.frame(0, 0, 0, 0)
colnames(out.table4) <- c("Performance Measure", "Type 0", "Type 1", "Type 2")
out.table4[1,] <- c("Sensitivity", Euc_sensitivity_0, Euc_sensitivity_1, Euc_sensitivity_2)
out.table4[2,] <- c("Specificity", Euc_specificity_0, Euc_specificity_1, Euc_specificity_2)
out.table4[3,] <- c("Pos. Pred. Value", Euc_pos_pred_val_0, Euc_pos_pred_val_1, Euc_pos_pred_val_2)
out.table4[4,] <- c("Neg. Pred. Value", Euc_neg_pred_val_0, Euc_neg_pred_val_1, Euc_neg_pred_val_2)

out.table4[,2] <- as.numeric(out.table4[,2])*100
out.table4[,3] <- as.numeric(out.table4[,3])*100
out.table4[,4] <- as.numeric(out.table4[,4])*100

xtable(out.table4, digits=2)


### Euclidean Confusion Matrix

out.table5 <- data.frame(0, 0, 0, 0, 0)
colnames(out.table5) <- c("", "Type 0", "Type 1", "Type 2", "Total")
out.table5[1,1:4] <- c("Type 0", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==0)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==0)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==0)))
out.table5[2,1:4] <- c("Type 1", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==1)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==1)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==1)))
out.table5[3,1:4] <- c("Type 2", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==2)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==2)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==2)))

out.table5[,2] <- as.numeric(out.table5[,2])
out.table5[,3] <- as.numeric(out.table5[,3])
out.table5[,4] <- as.numeric(out.table5[,4])

out.table5[4,1:4] <- c("Total", sum(out.table5[1:3,2]), sum(out.table5[1:3,3]), sum(out.table5[1:3,4]))
out.table5[1:3,5] <- c(sum(as.numeric(out.table5[1,2:4])), sum(as.numeric(out.table5[2,2:4])), sum(as.numeric(out.table5[3,2:4])))
out.table5[4,5] <- sum(as.numeric(out.table5[4,2:4]))

xtable(out.table5)


detach(mydata)


##################################
## hidden conflict analysis

# note: much of this is hard coded

onsets <- c("BGD", "CHN", "FJI", "IDN", "IND", "KHM", "KOR", "LAO", "LKA", "MDG", "MMR", "NPL", "PHL", "SLB", "THA")
all <- c(3,1,2,11,5,2,1,1,1,4,5,4,3,3,7)
all <- all/53

mis1 <- mydata[which(mydata$Euc_pred_cat_0==1),]
mis2 <- mydata[which(mydata$Euc_pred_cat_0==1),]

mis1 <- mis1[which(mis1$Dyad_Type==1),]
mis2 <- mis2[which(mis2$Dyad_Type==2),]

s.2 <- c(substr(mis2$State_Week_1,1,3), substr(mis2$State_Week_2,1,3))
CHN.2 <- sum(s.2=="CHN") / length(s.2)
BGD.2 <- sum(s.2=="BGD") / length(s.2)
FJI.2 <- sum(s.2=="FJI") / length(s.2)
KOR.2 <- sum(s.2=="KOR") / length(s.2)
KHM.2 <- sum(s.2=="KHM") / length(s.2)
LAO.2 <- sum(s.2=="LAO") / length(s.2)
MDG.2 <- sum(s.2=="MDG") / length(s.2)
MMR.2 <- sum(s.2=="MMR") / length(s.2)
NPL.2 <- sum(s.2=="NPL") / length(s.2)
SLB.2 <- sum(s.2=="SLB") / length(s.2)
THA.2 <- sum(s.2=="THA") / length(s.2)

t2 <- c(BGD.2, CHN.2, FJI.2, 0, 0, KHM.2, KOR.2, LAO.2, 0, MDG.2, MMR.2, NPL.2, 0, SLB.2, THA.2)

s.1 <- c(substr(mis1$State_Week_1[mis1$y1==1],1,3), substr(mis1$State_Week_2[mis1$y2==1],1,3))
CHN.1 <- sum(s.1=="CHN") / length(s.1)
BGD.1 <- sum(s.1=="BGD") / length(s.1)
FJI.1 <- sum(s.1=="FJI") / length(s.1)
KOR.1 <- sum(s.1=="KOR") / length(s.1)
KHM.1 <- sum(s.1=="KHM") / length(s.1)
LAO.1 <- sum(s.1=="LAO") / length(s.1)
MDG.1 <- sum(s.1=="MDG") / length(s.1)
MMR.1 <- sum(s.1=="MMR") / length(s.1)
NPL.1 <- sum(s.1=="NPL") / length(s.1)
SLB.1 <- sum(s.1=="SLB") / length(s.1)
THA.1 <- sum(s.1=="THA") / length(s.1)

t1 <- c(BGD.1, CHN.1, FJI.1, 0, 0, KHM.1, KOR.1, LAO.1, 0, MDG.1, MMR.1, NPL.1, 0, SLB.1, THA.1)

bp.data <- data.frame(cbind(all, t1, t2))
bp.data <- cbind(onsets, bp.data)


## plot in R, save as pdf.  open pdf in preview, save as tiff.  open tiff, save without alpha channel.  open without alpha channel, compress tiff.
p


setEPS()
postscript(paste(plotpath, "/misses2.eps", sep=""), width=14, height=7, family="Times")
barplot(t(bp.data[-1]), main="", col=c("black","azure4","azure3"), beside=TRUE, xlab="State", ylab="Proportion", names.arg=onsets, cex.axis=1.3, cex.names=1.3, cex.lab=1.3)
legend(x="topright", legend=c("All Onsets", "Type 1 Masked", "Type 2 Masked"), fill=c("black","azure4","azure3"), cex=1.3)
dev.off()


pdf(paste(plotpath, "/misses2.pdf", sep=""), height = 7, width = 14, family="Times")
barplot(t(bp.data[-1]), main="", col=c("black","azure4","azure3"), beside=TRUE, xlab="State", ylab="Proportion", names.arg=onsets, cex.axis=1.3, cex.names=1.3, cex.lab=1.3, legend=c("All Onsets", "Type 1 Masked", "Type 2 Masked"), args.legend=list(x="topright", fill=c("black","azure4","azure3"), cex=1.3))
dev.off()


tiff(paste(plotpath, "/misses2.tiff", sep=""), width=480, height=240, family="Times")
barplot(t(bp.data[-1]), main="", col=c("black","azure4","azure3"), beside=TRUE, xlab="State", ylab="Proportion", names.arg=onsets, cex.axis=1.3, cex.names=1.3, cex.lab=1.3, legend=c("All Onsets", "Type 1 Masked", "Type 2 Masked"), args.legend=list(x="topright", fill=c("black","azure4","azure3"), cex=0.75))
dev.off()





